What are congress and Trump are saying about the coronavirus?

This analysis takes a look at the number, the positive and negative sentiment, and the content of the tweets, for each party and how that’s evolving over time since February 1, 2020.

I utilize open data hosted online. In particular, big thanks to Alex Litel who created the Tweets of Congress repo, where I pulled congressional tweets from, and the folks running Trump Twitter Archive, where I pulled Trump’s tweets from.

The repo for this project is: https://github.com/dcosme/congress-tweets-covid19

number of tweets

How many times has Congress and the President tweeted about COVID-19?

daily

plot_number(tweets, timescale = "day", plot = "bar")

plot_number(tweets, timescale = "day", plot = "line")

weekly

plot_number(tweets, timescale = "week", plot = "bar")

plot_number(tweets, timescale = "week", plot = "line")

sentiment of tweets

How positive and negative is the content of the tweets?

Here is a list of the top 20 positive or negative words for each party and the President.

overall

plot_sentiment(tweets)

by month

February

plot_sentiment(tweets, start_date = "2020-02-01", duration = 28)

March

plot_sentiment(tweets, start_date = "2020-03-01", duration = 30)

by week

Here is a list of the top 10 positive or negative words for each party and the President by week.

Feb 1 - Feb 7

plot_sentiment(tweets, n_words = 10, start_date = "2020-02-01", duration = 6)

Feb 8 - Feb 14

plot_sentiment(tweets, n_words = 10, start_date = "2020-02-08", duration = 6)

Feb 15 - Feb 21

plot_sentiment(tweets, n_words = 10, start_date = "2020-02-15", duration = 6)

Feb 22 - Feb 28

plot_sentiment(tweets, n_words = 10, start_date = "2020-02-22", duration = 6)

Feb 29 - Mar 6

plot_sentiment(tweets, n_words = 10, start_date = "2020-02-29", duration = 6)

Mar 7 - Mar 13

plot_sentiment(tweets, n_words = 10, start_date = "2020-03-07", duration = 6)

Mar 14 - Mar 20

plot_sentiment(tweets, n_words = 10, start_date = "2020-03-14", duration = 6)

Mar 21 - Mar 27

plot_sentiment(tweets, n_words = 10, start_date = "2020-03-21", duration = 6)

content of tweets

What are Congress and the President saying about COVID-19?

Here are 100 the most frequently used words by each party and the President.

overall

p = plot_content(tweets, start_date = "2020-02-01", n_words = 100)
p$plot

p$data %>%
  filter(party %in% c("democrats", "republicans", "trump")) %>%
  group_by(party) %>%
  summarize(`number of tweets` = n()) %>%
  kable(format = "pandoc")
party number of tweets
democrats 20413
republicans 18210
trump 205

by month

February

p = plot_content(tweets, start_date = "2020-02-01", duration = 28, n_words = 100)
p$plot

p$data %>%
  filter(party %in% c("democrats", "republicans", "trump")) %>%
  group_by(party) %>%
  summarize(`number of tweets` = n()) %>%
  kable(format = "pandoc")
party number of tweets
democrats 1917
republicans 1112
trump 24

March

p = plot_content(tweets, start_date = "2020-03-01", duration = 30, n_words = 100)
p$plot

p$data %>%
  filter(party %in% c("democrats", "republicans", "trump")) %>%
  group_by(party) %>%
  summarize(`number of tweets` = n()) %>%
  kable(format = "pandoc")
party number of tweets
democrats 18496
republicans 17098
trump 181

by week

Here are 50 the most frequently used words by each party and the President for each week in February and March.

Feb 1 - Feb 7

p = plot_content(tweets, start_date = "2020-02-01", duration = 6)
p$plot

p$data %>%
  filter(party %in% c("democrats", "republicans", "trump")) %>%
  group_by(party) %>%
  summarize(`number of tweets` = n()) %>%
  kable(format = "pandoc")
party number of tweets
democrats 199
republicans 133
trump 2

Feb 8 - Feb 14

p = plot_content(tweets, start_date = "2020-02-08", duration = 6)
p$plot

p$data %>%
  filter(party %in% c("democrats", "republicans", "trump")) %>%
  group_by(party) %>%
  summarize(`number of tweets` = n()) %>%
  kable(format = "pandoc")
party number of tweets
democrats 176
republicans 162
trump 1

Feb 15 - Feb 21

p = plot_content(tweets, start_date = "2020-02-15", duration = 6)
p$plot

p$data %>%
  filter(party %in% c("democrats", "republicans", "trump")) %>%
  group_by(party) %>%
  summarize(`number of tweets` = n()) %>%
  kable(format = "pandoc")
party number of tweets
democrats 103
republicans 80
trump 1

Feb 22 - Feb 28

p = plot_content(tweets, start_date = "2020-02-22", duration = 6)
p$plot

p$data %>%
  filter(party %in% c("democrats", "republicans", "trump")) %>%
  group_by(party) %>%
  summarize(`number of tweets` = n()) %>%
  kable(format = "pandoc")
party number of tweets
democrats 1222
republicans 682
trump 19

Feb 29 - Mar 6

p = plot_content(tweets, start_date = "2020-02-29", duration = 6)
p$plot

p$data %>%
  filter(party %in% c("democrats", "republicans", "trump")) %>%
  group_by(party) %>%
  summarize(`number of tweets` = n()) %>%
  kable(format = "pandoc")
party number of tweets
democrats 2580
republicans 2801
trump 24

Mar 7 - Mar 13

p = plot_content(tweets, start_date = "2020-03-07", duration = 6)
p$plot

p$data %>%
  filter(party %in% c("democrats", "republicans", "trump")) %>%
  group_by(party) %>%
  summarize(`number of tweets` = n()) %>%
  kable(format = "pandoc")
party number of tweets
democrats 5437
republicans 4171
trump 55

Mar 14 - Mar 20

p = plot_content(tweets, start_date = "2020-03-14", duration = 6)
p$plot

p$data %>%
  filter(party %in% c("democrats", "republicans", "trump")) %>%
  group_by(party) %>%
  summarize(`number of tweets` = n()) %>%
  kable(format = "pandoc")
party number of tweets
democrats 9627
republicans 9459
trump 82

Mar 21 - Mar 27

p = plot_content(tweets, start_date = "2020-03-21", duration = 6)
p$plot

p$data %>%
  filter(party %in% c("democrats", "republicans", "trump")) %>%
  group_by(party) %>%
  summarize(`number of tweets` = n()) %>%
  kable(format = "pandoc")
party number of tweets
democrats 1069
republicans 722
trump 21

browse tweets

Who’s tweeting the most and what are they tweeting?

number of tweets

tweets %>%
  filter(!is.na(party)) %>%
  group_by(state_name, title, first, last, twitter_handle, party, gender, ethnicity) %>%
  summarize(n = n()) %>%
  arrange(desc(n)) %>%
  ungroup() %>%
  rename("state" = state_name,
         "twitter handle" = twitter_handle) %>%
  mutate(state = as.factor(state),
         title = as.factor(title),
         party = as.factor(party),
         gender = as.factor(gender),
         ethnicity = as.factor(ethnicity)) %>%
  DT::datatable(filter = "top", rownames = FALSE)

word counts

tweets %>%
  filter(!is.na(party)) %>%
  select(state_name, title, first, last, twitter_handle, party, gender, ethnicity, day, text) %>%
  arrange(twitter_handle) %>%
  unnest_tokens(word, text) %>%
  group_by(state_name, title, first, last, twitter_handle, party, gender, ethnicity) %>%
  count(word, sort = TRUE) %>%
  anti_join(stop_words, by = "word") %>%
  filter(!grepl(ignore_root_words, word)) %>%
  filter(!word %in% ignore_words) %>%
  ungroup() %>%
  rename("state" = state_name,
         "twitter handle" = twitter_handle) %>%
  mutate(state = as.factor(state),
         title = as.factor(title),
         party = as.factor(party),
         gender = as.factor(gender),
         ethnicity = as.factor(ethnicity)) %>%
  DT::datatable(filter = "top", rownames = FALSE)

tweets

To browse all tweets, download all_tweets.csv

tweets %>%
  select(state_name, title, first, last, twitter_handle, party, gender, ethnicity, day, text) %>%
  arrange(twitter_handle) %>%
  rename("state" = state_name,
         "twitter handle" = twitter_handle,
         "date" = day) %>%
  mutate(state = as.factor(state),
         title = as.factor(title),
         party = as.factor(party),
         gender = as.factor(gender),
         ethnicity = as.factor(ethnicity)) %>%
  write.csv(., "all_tweets.csv", row.names = FALSE)